We identify the top methods for network structure prediction by computing the area under the receiver operating characteristic (AUROC) and accuracy (ACC) row sum and median values for each of the 28 networks. We weight these variables at 5 different ranges:
for (sim in (1:28)){
AUC<-read.csv(paste0("sim",sim,"/Combined_Data_onlyAUC.csv"),row.names = 1)
ACC<-read.csv(paste0("sim",sim,"/Combined_Data_onlyACC.csv"),row.names = 1)
#calculate AUC only sums and medians
AUC_sum<-cbind(AUC$Method_name,rowSums(AUC[,-1])) %>% as.data.frame()
colnames(AUC_sum)<-c("Method_name","Sum_of_avg_across_patients")
AUC_sum$Sum_of_avg_across_patients<-as.numeric(AUC_sum$Sum_of_avg_across_patients)
AUC_median<-cbind(AUC$Method_name,apply(AUC[-1], 1, median)) %>% as.data.frame()
colnames(AUC_median)<-c("Method_name","Median_across_patients")
write.csv(AUC_sum,paste0('sim',sim,'/Combined_data_sums_onlyAUC.csv'),quote=F)
write.csv(AUC_median,paste0('sim',sim,'/Combined_data_median_onlyAUC.csv'),quote=F)
#calculate ACC only sums and medians
ACC_sum<-cbind(ACC$Method_name,rowSums(ACC[,-1])) %>% as.data.frame()
colnames(ACC_sum)<-c("Method_name","Sum_of_avg_across_patients")
ACC_sum$Sum_of_avg_across_patients<-as.numeric(ACC_sum$Sum_of_avg_across_patients)
ACC_median<-cbind(ACC$Method_name,apply(ACC[-1], 1, median)) %>% as.data.frame()
colnames(ACC_median)<-c("Method_name","Median_across_patients")
write.csv(ACC_sum,paste0('sim',sim,'/Combined_data_sums_onlyACC.csv'),quote=F)
write.csv(ACC_median,paste0('sim',sim,'/Combined_data_median_onlyACC.csv'),quote=F)
# calculate equal weights sums and medians
AUC_half<-cbind(sapply(AUC[,-1],"*",0.5))
ACC_half<-cbind(sapply(ACC[,-1],"*",0.5))
equal<-cbind( AUC_half + ACC_half)
equal_sum<-cbind(AUC$Method_name,rowSums(equal)) %>% as.data.frame()
colnames(equal_sum)<-c("Method_name","Sum_of_avg_across_patients")
equal_sum$Sum_of_avg_across_patients<-as.numeric(equal_sum$Sum_of_avg_across_patients)
equal_median<-cbind(AUC$Method_name,apply(equal, 1, median)) %>% as.data.frame()
colnames(equal_median)<-c("Method_name","Median_across_patients")
write.csv(equal,paste0('sim',sim,'/Combined_data_equal.csv'),quote=F)
write.csv(equal_sum,paste0('sim',sim,'/Combined_data_sums_equal.csv'),quote=F)
write.csv(equal_median,paste0('sim',sim,'/Combined_data_median_equal.csv'),quote=F)
#calculate 0.3 AUROC and 0.7 ACC weights sums and medians
AUC_low<-cbind(sapply(AUC[,-1],"*",0.3))
ACC_high<-cbind(sapply(ACC[,-1],"*",0.7))
three_seven<-cbind( AUC_low + ACC_high)
three_seven_sum<-cbind(AUC$Method_name,rowSums(three_seven)) %>% as.data.frame()
colnames(three_seven_sum)<-c("Method_name","Sum_of_avg_across_patients")
three_seven_sum$Sum_of_avg_across_patients<-as.numeric(three_seven_sum$Sum_of_avg_across_patients)
three_seven_median<-cbind(AUC$Method_name,apply(three_seven, 1, median)) %>% as.data.frame()
colnames(three_seven_median)<-c("Method_name","Median_across_patients")
write.csv(three_seven,paste0('sim',sim,'/Combined_data_37.csv'),quote=F)
write.csv(three_seven_sum,paste0('sim',sim,'/Combined_data_sums_37.csv'),quote=F)
write.csv(three_seven_median,paste0('sim',sim,'/Combined_data_median_37.csv'),quote=F)
#calculate 0.7 AUROC and 0.3 ACC weights sums and medianssums and medians}
AUC_high<-cbind(sapply(AUC[,-1],"*",0.7))
ACC_low<-cbind(sapply(ACC[,-1],"*",0.3))
seven_three<-cbind( AUC_low + ACC_high)
seven_three_sum<-cbind(AUC$Method_name,rowSums(seven_three)) %>% as.data.frame()
colnames(seven_three_sum)<-c("Method_name","Sum_of_avg_across_patients")
seven_three_sum$Sum_of_avg_across_patients<-as.numeric(seven_three_sum$Sum_of_avg_across_patients)
seven_three_median<-cbind(AUC$Method_name,apply(seven_three, 1, median)) %>% as.data.frame()
colnames(seven_three_median)<-c("Method_name","Median_across_patients")
write.csv(seven_three,paste0('sim',sim,'/Combined_data_73.csv'),quote=F)
write.csv(seven_three_sum,paste0('sim',sim,'/Combined_data_sums_73.csv'),quote=F)
write.csv(seven_three_median,paste0('sim',sim,'/Combined_data_median_73.csv'),quote=F)
}
Next, we compute the average Row Sums and Median values under the 5 AUROC and ACC weights.
patterns<-c("Combined_data_sums_onlyAUC","Combined_data_sums_onlyACC","Combined_data_sums_equal","Combined_data_sums_37","Combined_data_sums_73")
dir.create("averages")
for (pattern in patterns){
count_files <- list.files(recursive=TRUE,pattern=paste0("^",pattern,".csv"),full.names=TRUE)
d1<-data.frame()
for (i in count_files) {
# read file i as a data frame
f <- read.csv(i,row.names = 1)
if(length(d1) == 0){
d1 <- f
}else
{
d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients+f$Sum_of_avg_across_patients
}
}
d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients/28
d1$Sum_of_avg_across_patients<-d1$Sum_of_avg_across_patients/50
d1<- d1 %>% arrange(desc(Sum_of_avg_across_patients))
write.csv(d1,paste0("averages/Average_",pattern,".csv"),quote=F)
}
patterns<-c("Combined_data_median_onlyAUC","Combined_data_median_onlyACC","Combined_data_median_equal","Combined_data_median_37","Combined_data_median_73")
for (pattern in patterns){
count_files <- list.files(recursive=TRUE,pattern=paste0("^",pattern,".csv"),full.names=TRUE)
count_files<-count_files[!grepl("all", count_files)]
d1<-data.frame()
for (i in count_files) {
# read file i as a data frame
f <- read.csv(i,row.names = 1)
if(length(d1) == 0){
d1 <- f
}else
{
d1$Median_across_patients<-d1$Median_across_patients+f$Median_across_patients
}
}
d1$Median_across_patients<-d1$Median_across_patients/28
d1<- d1 %>% arrange(desc(Median_across_patients))
write.csv(d1,paste0("averages/Average_Median_",pattern,".csv"),quote=F)
}
rm(list = ls())
To identify the top performing network methods across all 28 networks and under different weights of the AUROC and ACC, we generate a heatmap with hierarchical clustering
pal<-rev(c("#FF0000","#000000","#031e33","#0b5394","#38761d","#ffe700","#FFFFFF"))
count_files <- list.files(path="averages",recursive=TRUE,pattern=paste0("Average_Combined"),full.names=TRUE)
d1<-data.frame()
for (i in count_files){
f<-read.csv(i,row.names = 1)
if(length(d1) == 0){
d1<-f
}
else{
d1<- merge(d1, f, by.x = "Method_name", by.y = "Method_name")
}
}
rownames(d1)<-d1$Method_name
colnames(d1)<-c("Method_name","AUC=0.3,ACC=0.7","AUC=0.7,ACC=0.3","AUC=0.5,ACC=0.5","AUC=0,ACC=1","AUC=1,ACC=0")
d1<- d1 %>% select(-Method_name)
heatmaply(d1,colors = pal,show_dendrogram = c(TRUE, FALSE),labCol=c("Weight<sub>AUC=0.3</sub>,Weight<sub>ACC=0.7</sub>","Weight<sub>AUC=0.7</sub>,Weight<sub>ACC=0.3</sub>","Weight<sub>AUC=0.5</sub>,Weight<sub>ACC=0.5</sub>","Weight<sub>AUC=0</sub>,Weight<sub>ACC=1</sub>","Weight<sub>AUC=1</sub>,Weight<sub>ACC=0</sub>"),main="Average Row Sums Value across 28 Networks",file = "heatmap_rowsums.pdf",width=800,height=1000,margins = c(40, 50)) %>%
layout(height = 900)
patterns<-c("Combined_data_median_onlyAUC","Combined_data_median_onlyACC","Combined_data_median_equal","Combined_data_median_37","Combined_data_median_73")
count_files <- list.files(path="averages",recursive=TRUE,pattern=paste0("Average_Median"),full.names=TRUE)
d1<-data.frame()
for (i in count_files){
f<-read.csv(i,row.names = 1)
if(length(d1) == 0){
d1<-f
}
else{
d1<- merge(d1, f, by.x = "Method_name", by.y = "Method_name")
}
}
rownames(d1)<-d1$Method_name
colnames(d1)<-c("Method_name","AUC=0.3,ACC=0.7","AUC=0.7,ACC=0.3","AUC=0.5,ACC=0.5","AUC=0,ACC=1","AUC=1,ACC=0")
d1<- d1 %>% select(-Method_name)
heatmaply(d1,colors = pal,show_dendrogram = c(TRUE, FALSE),labCol=c("Weight<sub>AUC=0.3</sub>,Weight<sub>ACC=0.7</sub>","Weight<sub>AUC=0.7</sub>,Weight<sub>ACC=0.3</sub>","Weight<sub>AUC=0.5</sub>,Weight<sub>ACC=0.5</sub>","Weight<sub>AUC=0</sub>,Weight<sub>ACC=1</sub>","Weight<sub>AUC=1</sub>,Weight<sub>ACC=0</sub>"),main="Average Median Value across 28 Networks",file = "heatmap_medians.pdf",width=800,height=1000,margins = c(40, 50)) %>%
layout(height = 900)
Using our heatmaps, we see that the top combined methods of network structure prediction are BF_PFmean, T_BCmean, T_BF_BCmean, T_BF_PF_BCmean, T_BF_PFmean, T_BFmean, T_PF_BCmean, T_PFmean. We can see how these compare to the individual methods (Tigress, BCorrU, BCohF, and PCohF) in all 28 networks individually using line plots
create_rowsum_lineplot<-function(pattern,methods){
count_files <- list.files(recursive=T,pattern=paste0("^",pattern),full.names=TRUE)
d1<-data.frame()
for (i in count_files) {
f <- read.csv(i,row.names = 1) %>% filter(Method_name %in% methods)
f<- cbind(f,c(rep(unlist(strsplit(i,'/'))[2],nrow(f))))
colnames(f)[3]<-c('time_series')
#if the counts object is empty just copy the f to m
if(length(d1) == 0){
d1 <- f
} else
{
#if the dataframe is not empty then merge the data
d1 <- rbind(d1,f)
}
}
d1$time_series<-gsub("sim","",d1$time_series)
d1$time_series<-str_pad(d1$time_series, 2, pad = "0")
colnames(d1)<-c("Method","Row Sum","Network")
d1$`Row Sum`<-d1$`Row Sum`/50
return(d1)
}
create_median_lineplot<-function(pattern,methods){
count_files <- list.files(recursive=T,pattern=paste0("^",pattern),full.names=TRUE)
d1<-data.frame()
for (i in count_files) {
f <- read.csv(i,row.names = 1) %>% filter(Method_name %in% methods)
f<- cbind(f,c(rep(unlist(strsplit(i,'/'))[2],nrow(f))))
colnames(f)[3]<-c('time_series')
#if the counts object is empty just copy the f to m
if(length(d1) == 0){
d1 <- f
} else
{
#if the dataframe is not empty then merge the data
d1 <- rbind(d1,f)
}
}
d1$time_series<-gsub("sim","",d1$time_series)
d1$time_series<-str_pad(d1$time_series, 2, pad = "0")
colnames(d1)<-c("Method","Median","Network")
return(d1)
}
We can also plot all 26 combined and individual methods on an interactive line plot